package com.galeno.sparksql

import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/419:44
 */
object sparkSql01 {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("log").setLevel(Level.ERROR)
    val spark = SparkSession.builder()
      .appName(this.getClass.getName)
      .master("local")
      .getOrCreate()
    val df = spark.read.csv("data/battel.txt")
    val df1: DataFrame = df.toDF("id", "name", "role", "attack")
    //查看表内数据
    df1.show(100, false)
    //查看表结构
    df1.printSchema()
    //计算每一种角色的战斗力>400
    val res1: Dataset[Row] = df1.where("attack>400")
    res1.show()

    //平均战斗力
    df1.groupBy("role").agg(("attack", "avg")).show()
    df1.groupBy("role").agg("attack" -> "avg").show()

    /**
     * 写sql
     */
    df1.createTempView("battel")
    val res3: DataFrame = spark.sql(
      """
        |
        |select
        | role,
        | avg(attack) as avgact
        |from
        |battel
        |where
        |attack >=400
        |group by
        |role
        |
        |""".stripMargin)
    res3.show()


  }

}
